install.packages("readxl")
library(readxl)

df = read_excel("italy_balances.xlsx")
names(df)
df = df[-c(1,2,3,4)]

Una prima possibilità è concetrarci solo sugli indici di bilancio

df2 = df[c("R.O.E. (Return on Equity)","R.O.I. Cerved (Return on Investment)","R.O.S. (Return on Sales)","ROT (Return On Turnover) FATTURATO / ATTIVO SP","R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP")] 

summary(df2)
 R.O.E. (Return on Equity) R.O.I. Cerved (Return on Investment) R.O.S. (Return on Sales) ROT (Return On Turnover) FATTURATO / ATTIVO SP
 Min.   :-945.92           Min.   :-80.330                      Min.   :-889.6400        Min.   :0.01636                               
 1st Qu.:   0.17           1st Qu.:  0.780                      1st Qu.:   1.1800        1st Qu.:1.34801                               
 Median :  14.43           Median :  6.440                      Median :   5.0700        Median :2.24448                               
 Mean   :  14.89           Mean   :  9.336                      Mean   :  -0.5772        Mean   :2.35798                               
 3rd Qu.:  46.00           3rd Qu.: 17.260                      3rd Qu.:  10.0600        3rd Qu.:3.16350                               
 Max.   : 475.94           Max.   : 65.150                      Max.   :  89.3500        Max.   :9.06781                               
 R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP
 Min.   :-80.500                                         
 1st Qu.:  0.880                                         
 Median :  5.640                                         
 Mean   :  8.688                                         
 3rd Qu.: 16.090                                         
 Max.   : 62.820                                         
install.packages("plotly")
apertura URL 'https://cran.rstudio.com/bin/macosx/contrib/4.2/plotly_4.10.3.tgz'
Content type 'application/x-gzip' length 3202729 bytes (3.1 MB)
==================================================
downloaded 3.1 MB

I pacchetti binari scaricati sono in
    /var/folders/1m/gxy5wsh93qq112t_f6ddt48w0000gn/T//Rtmp6IaPTV/downloaded_packages
library(plotly)
Caricamento del pacchetto richiesto: ggplot2
Registered S3 method overwritten by 'data.table':
  method           from
  print.data.table     
Registered S3 method overwritten by 'htmlwidgets':
  method           from         
  print.htmlwidget tools:rstudio

Caricamento pacchetto: ‘plotly’

Il seguente oggetto è mascherato da ‘package:ggplot2’:

    last_plot

Il seguente oggetto è mascherato da ‘package:stats’:

    filter

Il seguente oggetto è mascherato da ‘package:graphics’:

    layout
plot(df2)


#Analizzare un po il dataset
plot_ly(data = df2, x=df2$`R.O.I. Cerved (Return on Investment)`, y=df2$`R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP`,  type = "scatter")
No scatter mode specifed:
  Setting the mode to markers
  Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
No scatter mode specifed:
  Setting the mode to markers
  Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode

Splitting

set.seed(0)
splitting = sample(1:nrow(df2), 0.8*nrow(df2))
train_data = df2[splitting, ]
test_data =  df2[- splitting, ]

Prima regressione

model = lm( train_data$`R.O.I. Cerved (Return on Investment)` ~ train_data$`R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP`, data = train_data) 
summary(model)

Call:
lm(formula = train_data$`R.O.I. Cerved (Return on Investment)` ~ 
    train_data$`R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP`, 
    data = train_data)

Residuals:
    Min      1Q  Median      3Q     Max 
-79.553  -1.072  -0.299   1.011  35.456 

Coefficients:
                                                                      Estimate Std. Error t value Pr(>|t|)    
(Intercept)                                                            1.05846    0.23495   4.505 7.82e-06 ***
train_data$`R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP`  0.96170    0.01409  68.271  < 2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 5.275 on 674 degrees of freedom
Multiple R-squared:  0.8737,    Adjusted R-squared:  0.8735 
F-statistic:  4661 on 1 and 674 DF,  p-value: < 2.2e-16

Analizziamo la regressione

install.packages("lmtest")
library(lmtest)

install.packages("mvinfluence")
library(mvinfluence)

library(car)
#1 LINEARITA' DEI DATI (Residuals vs Fitted values)
plot(model)



#2 INDIPENDENZA DEI RESIDUI
#Dubrin-Watson test per vedere se gli errori sono correlati
dwtest(formula = model,  alternative = "two.sided")

    Durbin-Watson test

data:  model
DW = 1.9611, p-value = 0.6117
alternative hypothesis: true autocorrelation is not 0
#3 DISTRIBUZIONE NORMALE DEI RESIDIU
#Lo shapiro so fa sui residui per vedere la distribuzione delgi errori
#Da vedere insieme al normal Q-Q plot
shapiro.test(model$residuals)

    Shapiro-Wilk normality test

data:  model$residuals
W = 0.56165, p-value < 2.2e-16
#Come valutare HOMO o ETERO SCHEDASTICITA' Breusch-Pagan Test (varianza errori non costante H1)
#Insieme allo scale-location plot
#Insieme al residual vs fitted plot
bptest(model) # p-value bassisimo, dobbiamo rifiutare H0, quinidi varianza non costante

    studentized Breusch-Pagan test

data:  model
BP = 2.5448, df = 1, p-value = 0.1107
#4 ANALISI DEI VALORI ESTREMI OUTLIERS (Y) ED OSSERVAZIONI AD ALTO LEVERAGE(X)
influencePlot(model)

influenceIndexPlot(model)

infIndexPlot(model)

barplot(cooks.distance(model))


#5 MULTICOLINEARITA' (non va bene quando ritorna valori superiori a 10 della variabile)
#Variance Inflation Factor
#vif(model)

Opterei per eliminarli perchè danno problemi anche sul Q&Q e Shapiro

df2[c(284,468,478,616),]

Con tutti i predittori per confronto anche con NN

model2 = lm( train_data$`R.O.I. Cerved (Return on Investment)` ~ ., data = train_data) 
summary(model2)

Call:
lm(formula = train_data$`R.O.I. Cerved (Return on Investment)` ~ 
    ., data = train_data)

Residuals:
    Min      1Q  Median      3Q     Max 
-79.239  -1.179  -0.113   1.064  35.570 

Coefficients:
                                                            Estimate Std. Error t value Pr(>|t|)    
(Intercept)                                                 0.086436   0.406027   0.213  0.83148    
`R.O.E. (Return on Equity)`                                -0.005140   0.003192  -1.610  0.10778    
`R.O.S. (Return on Sales)`                                 -0.001776   0.004370  -0.406  0.68464    
`ROT (Return On Turnover) FATTURATO / ATTIVO SP`            0.427092   0.147208   2.901  0.00384 ** 
`R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP`  0.967643   0.016759  57.740  < 2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 5.246 on 671 degrees of freedom
Multiple R-squared:  0.8756,    Adjusted R-squared:  0.8749 
F-statistic:  1181 on 4 and 671 DF,  p-value: < 2.2e-16
summary(model2)$sigma^2 #MSE
[1] 27.51841
plot(model2)

influencePlot(model)
NA

proviamo le predizioni con il test_data

test = data.frame(
  actual = test_data$`R.O.I. Cerved (Return on Investment)`,
  preds1 = predict(model, test_data),
  preds2 = predict(model2, test_data)
)
Avvertimento: 'newdata' ha 169 righe ma la variabile trovata ha 676 righe
test

Il secondo modello è decisamente migliore a fare predizioni rispetto al primo, ma spesso si scosta parecchio

Modello Decision Tree

library(ggplot2)
library(lattice)
library(caret)
library(rpart)
library(rpart.plot)

model_dt = rpart(train_data$`R.O.I. Cerved (Return on Investment)` ~ ., data=train_data, cp=0.01000000)

print(model_dt)
n= 676 

node), split, n, deviance, yval
      * denotes terminal node

 1) root 676 148460.1000   9.1462130  
   2) R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP< 15.07 500  53405.7000   2.8620800  
     4) R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP< -28.435 8   2823.7820 -46.5100000 *
     5) R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP>=-28.435 492  30764.0100   3.6648780  
      10) R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP< 3.795 271  13396.0400  -0.8597048  
        20) R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP< -4.845 49   1220.0290  -7.3075510 *
        21) R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP>=-4.845 222   9689.2050   0.5634685 *
      11) R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP>=3.795 221   5017.0580   9.2131220  
        22) R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP< 9.075 127   2120.6550   6.8003150 *
        23) R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP>=9.075 94   1158.1480  12.4729800 *
   3) R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP>=15.07 176  19215.0600  26.9988600  
     6) R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP< 30.945 133   3815.9480  22.3776700  
      12) R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP< 22.325 80    746.4064  19.3663800 *
      13) R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP>=22.325 53   1249.1190  26.9230200 *
     7) R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP>=30.945 43   3773.8060  41.2923300  
      14) R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP< 44.875 31   1291.8590  36.8500000 *
      15) R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP>=44.875 12    289.8000  52.7683300 *
summary(model_dt)
Call:
rpart(formula = train_data$`R.O.I. Cerved (Return on Investment)` ~ 
    ., data = train_data, cp = 0.01)
  n= 676 

          CP nsplit rel error    xerror       xstd
1 0.51084002      0 1.0000000 1.0012834 0.10703886
2 0.13348974      1 0.4891600 0.4978523 0.08186267
3 0.08319349      2 0.3556702 0.4712059 0.07754073
4 0.07830590      3 0.2724768 0.3861187 0.07048742
5 0.01675066      4 0.1941709 0.2312154 0.04720192
6 0.01476590      5 0.1774202 0.2093518 0.04821259
7 0.01226203      6 0.1626543 0.2075929 0.04823238
8 0.01170856      7 0.1503923 0.1948070 0.04803373
9 0.01000000      8 0.1386837 0.1860572 0.04808283

Variable importance
R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP                                 R.O.S. (Return on Sales) 
                                                      58                                                       21 
                               R.O.E. (Return on Equity)           ROT (Return On Turnover) FATTURATO / ATTIVO SP 
                                                      20                                                        1 

Node number 1: 676 observations,    complexity param=0.51084
  mean=9.146213, MSE=219.6156 
  left son=2 (500 obs) right son=3 (176 obs)
  Primary splits:
      R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP < 15.07     to the left,  improve=0.51084000, (0 missing)
      R.O.E. (Return on Equity)                                < 15.81     to the left,  improve=0.41513470, (0 missing)
      R.O.S. (Return on Sales)                                 < 6.385     to the left,  improve=0.35622980, (0 missing)
      ROT (Return On Turnover) FATTURATO / ATTIVO SP           < 1.611753  to the left,  improve=0.09955888, (0 missing)
  Surrogate splits:
      R.O.S. (Return on Sales)                       < 9.61      to the left,  agree=0.861, adj=0.466, (0 split)
      R.O.E. (Return on Equity)                      < 28.495    to the left,  agree=0.855, adj=0.443, (0 split)
      ROT (Return On Turnover) FATTURATO / ATTIVO SP < 7.420978  to the left,  agree=0.741, adj=0.006, (0 split)

Node number 2: 500 observations,    complexity param=0.1334897
  mean=2.86208, MSE=106.8114 
  left son=4 (8 obs) right son=5 (492 obs)
  Primary splits:
      R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP < -28.435   to the left,  improve=0.37108220, (0 missing)
      R.O.S. (Return on Sales)                                 < 0.26      to the left,  improve=0.30635250, (0 missing)
      R.O.E. (Return on Equity)                                < 3.965     to the left,  improve=0.27261810, (0 missing)
      ROT (Return On Turnover) FATTURATO / ATTIVO SP           < 0.7025735 to the left,  improve=0.05462503, (0 missing)

Node number 3: 176 observations,    complexity param=0.0783059
  mean=26.99886, MSE=109.1765 
  left son=6 (133 obs) right son=7 (43 obs)
  Primary splits:
      R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP < 30.945    to the left,  improve=0.60501010, (0 missing)
      R.O.S. (Return on Sales)                                 < 12.795    to the left,  improve=0.23792960, (0 missing)
      R.O.E. (Return on Equity)                                < 39.165    to the left,  improve=0.10273760, (0 missing)
      ROT (Return On Turnover) FATTURATO / ATTIVO SP           < 1.479734  to the left,  improve=0.03835117, (0 missing)
  Surrogate splits:
      R.O.S. (Return on Sales)  < 18.405    to the left,  agree=0.807, adj=0.209, (0 split)
      R.O.E. (Return on Equity) < 90.925    to the left,  agree=0.778, adj=0.093, (0 split)

Node number 4: 8 observations
  mean=-46.51, MSE=352.9727 

Node number 5: 492 observations,    complexity param=0.08319349
  mean=3.664878, MSE=62.52848 
  left son=10 (271 obs) right son=11 (221 obs)
  Primary splits:
      R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP < 3.795     to the left,  improve=0.40147290, (0 missing)
      R.O.E. (Return on Equity)                                < 3.965     to the left,  improve=0.34501190, (0 missing)
      R.O.S. (Return on Sales)                                 < 0.6       to the left,  improve=0.33943730, (0 missing)
      ROT (Return On Turnover) FATTURATO / ATTIVO SP           < 1.611753  to the left,  improve=0.07742249, (0 missing)
  Surrogate splits:
      R.O.E. (Return on Equity)                      < 4.39      to the left,  agree=0.894, adj=0.765, (0 split)
      R.O.S. (Return on Sales)                       < 3.35      to the left,  agree=0.837, adj=0.638, (0 split)
      ROT (Return On Turnover) FATTURATO / ATTIVO SP < 1.793141  to the left,  agree=0.602, adj=0.113, (0 split)

Node number 6: 133 observations,    complexity param=0.01226203
  mean=22.37767, MSE=28.69134 
  left son=12 (80 obs) right son=13 (53 obs)
  Primary splits:
      R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP < 22.325    to the left,  improve=0.47705630, (0 missing)
      R.O.S. (Return on Sales)                                 < 6.885     to the left,  improve=0.12783850, (0 missing)
      R.O.E. (Return on Equity)                                < 31.035    to the left,  improve=0.05785069, (0 missing)
      ROT (Return On Turnover) FATTURATO / ATTIVO SP           < 1.263069  to the left,  improve=0.02107806, (0 missing)
  Surrogate splits:
      R.O.S. (Return on Sales)                       < 13.33     to the left,  agree=0.662, adj=0.151, (0 split)
      R.O.E. (Return on Equity)                      < 84.38     to the left,  agree=0.624, adj=0.057, (0 split)
      ROT (Return On Turnover) FATTURATO / ATTIVO SP < 1.015029  to the right, agree=0.617, adj=0.038, (0 split)

Node number 7: 43 observations,    complexity param=0.0147659
  mean=41.29233, MSE=87.76293 
  left son=14 (31 obs) right son=15 (12 obs)
  Primary splits:
      R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP < 44.875    to the left,  improve=0.58088490, (0 missing)
      R.O.S. (Return on Sales)                                 < 13.72     to the left,  improve=0.22156580, (0 missing)
      R.O.E. (Return on Equity)                                < 66.085    to the left,  improve=0.17764150, (0 missing)
      ROT (Return On Turnover) FATTURATO / ATTIVO SP           < 2.440472  to the left,  improve=0.03279907, (0 missing)

Node number 10: 271 observations,    complexity param=0.01675066
  mean=-0.8597048, MSE=49.43188 
  left son=20 (49 obs) right son=21 (222 obs)
  Primary splits:
      R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP < -4.845    to the left,  improve=0.18563730, (0 missing)
      R.O.S. (Return on Sales)                                 < 0.26      to the left,  improve=0.17634390, (0 missing)
      R.O.E. (Return on Equity)                                < 0.06      to the left,  improve=0.04779047, (0 missing)
      ROT (Return On Turnover) FATTURATO / ATTIVO SP           < 1.615666  to the left,  improve=0.03614259, (0 missing)
  Surrogate splits:
      R.O.S. (Return on Sales)  < -9.475    to the left,  agree=0.904, adj=0.469, (0 split)
      R.O.E. (Return on Equity) < -78.88    to the left,  agree=0.841, adj=0.122, (0 split)

Node number 11: 221 observations,    complexity param=0.01170856
  mean=9.213122, MSE=22.70162 
  left son=22 (127 obs) right son=23 (94 obs)
  Primary splits:
      R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP < 9.075     to the left,  improve=0.34646890, (0 missing)
      R.O.E. (Return on Equity)                                < 15.81     to the left,  improve=0.17146310, (0 missing)
      ROT (Return On Turnover) FATTURATO / ATTIVO SP           < 0.8746952 to the left,  improve=0.02946706, (0 missing)
      R.O.S. (Return on Sales)                                 < 13.165    to the left,  improve=0.01056238, (0 missing)
  Surrogate splits:
      R.O.E. (Return on Equity) < 21.66     to the left,  agree=0.692, adj=0.277, (0 split)
      R.O.S. (Return on Sales)  < 6.4       to the left,  agree=0.629, adj=0.128, (0 split)

Node number 12: 80 observations
  mean=19.36638, MSE=9.330081 

Node number 13: 53 observations
  mean=26.92302, MSE=23.56829 

Node number 14: 31 observations
  mean=36.85, MSE=41.67287 

Node number 15: 12 observations
  mean=52.76833, MSE=24.15 

Node number 20: 49 observations
  mean=-7.307551, MSE=24.89855 

Node number 21: 222 observations
  mean=0.5634685, MSE=43.64507 

Node number 22: 127 observations
  mean=6.800315, MSE=16.69807 

Node number 23: 94 observations
  mean=12.47298, MSE=12.32072 
print(log(model_dt$variable.importance))
R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP                                 R.O.S. (Return on Sales) 
                                               11.758778                                                10.764507 
                               R.O.E. (Return on Equity)           ROT (Return On Turnover) FATTURATO / ATTIVO SP 
                                               10.714978                                                 7.547904 
rpart.plot(model_dt)

model_dt$cptable
          CP nsplit rel error    xerror       xstd
1 0.51084002      0 1.0000000 1.0012834 0.10703886
2 0.13348974      1 0.4891600 0.4978523 0.08186267
3 0.08319349      2 0.3556702 0.4712059 0.07754073
4 0.07830590      3 0.2724768 0.3861187 0.07048742
5 0.01675066      4 0.1941709 0.2312154 0.04720192
6 0.01476590      5 0.1774202 0.2093518 0.04821259
7 0.01226203      6 0.1626543 0.2075929 0.04823238
8 0.01170856      7 0.1503923 0.1948070 0.04803373
9 0.01000000      8 0.1386837 0.1860572 0.04808283
install.packages("partykit")
apertura URL 'https://cran.rstudio.com/bin/macosx/contrib/4.2/partykit_1.2-20.tgz'
Content type 'application/x-gzip' length 2412379 bytes (2.3 MB)
==================================================
downloaded 2.3 MB

I pacchetti binari scaricati sono in
    /var/folders/1m/gxy5wsh93qq112t_f6ddt48w0000gn/T//Rtmp6IaPTV/downloaded_packages
library(grid)
library(libcoin)
library(mvtnorm)
library(partykit)
model_dt = as.party(model_dt)
model_dt

Model formula:
train_data$`R.O.I. Cerved (Return on Investment)` ~ `R.O.E. (Return on Equity)` + 
    `R.O.S. (Return on Sales)` + `ROT (Return On Turnover) FATTURATO / ATTIVO SP` + 
    `R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP`

Fitted party:
[1] root
|   [2] R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP < 15.07
|   |   [3] R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP < -28.435: -46.510 (n = 8, err = 2823.8)
|   |   [4] R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP >= -28.435
|   |   |   [5] R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP < 3.795
|   |   |   |   [6] R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP < -4.845: -7.308 (n = 49, err = 1220.0)
|   |   |   |   [7] R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP >= -4.845: 0.563 (n = 222, err = 9689.2)
|   |   |   [8] R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP >= 3.795
|   |   |   |   [9] R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP < 9.075: 6.800 (n = 127, err = 2120.7)
|   |   |   |   [10] R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP >= 9.075: 12.473 (n = 94, err = 1158.1)
|   [11] R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP >= 15.07
|   |   [12] R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP < 30.945
|   |   |   [13] R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP < 22.325: 19.366 (n = 80, err = 746.4)
|   |   |   [14] R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP >= 22.325: 26.923 (n = 53, err = 1249.1)
|   |   [15] R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP >= 30.945
|   |   |   [16] R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP < 44.875: 36.850 (n = 31, err = 1291.9)
|   |   |   [17] R.O.A. Cerved (Return on Assets) UTILE NETTO / ATTIVO SP >= 44.875: 52.768 (n = 12, err = 289.8)

Number of inner nodes:    8
Number of terminal nodes: 9
plot(model_dt, inner_panel = node_inner(model_dt, pval = FALSE, id = FALSE),
  terminal_panel = node_boxplot(model_dt, id = TRUE))

newdata_dt= data.frame(ROI_test = test_data$`R.O.I. Cerved (Return on Investment)`)
newdata_dt$ROIpred =  predict(model_dt, test_data, type="response") 

Analisi predizioni con DT

newdata_dt$std_err = newdata_dt$ROI_test - newdata_dt$ROIpred
newdata_dt
summary(newdata_dt)
    ROI_test         ROIpred           std_err         
 Min.   :-23.65   Min.   :-7.3075   Min.   :-17.81347  
 1st Qu.:  0.62   1st Qu.: 0.5635   1st Qu.: -2.54637  
 Median :  5.20   Median : 6.8003   Median :  0.03363  
 Mean   : 10.09   Mean   :10.3310   Mean   : -0.23730  
 3rd Qu.: 18.43   3rd Qu.:19.3664   3rd Qu.:  2.42969  
 Max.   : 65.15   Max.   :52.7683   Max.   : 23.31362  
mean((newdata_dt$std_err)^2)
[1] 28.90742
plot_ly(data = newdata_dt, y=newdata_dt$std_err, x=seq(1,169), color=newdata_dt$std_err)
No trace type specified:
  Based on info supplied, a 'scatter' trace seems appropriate.
  Read more about this trace type -> https://plotly.com/r/reference/#scatter
No scatter mode specifed:
  Setting the mode to markers
  Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
No trace type specified:
  Based on info supplied, a 'scatter' trace seems appropriate.
  Read more about this trace type -> https://plotly.com/r/reference/#scatter
No scatter mode specifed:
  Setting the mode to markers
  Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode

#Analisi predizioni con Regressione

newdata_reg = data.frame(ROI_test = test_data$`R.O.I. Cerved (Return on Investment)`)
newdata_reg$ROI_pred = predict(model2, test_data)


newdata_reg$std_err = newdata_reg$ROI_test - newdata_reg$ROI_pred
newdata_reg
summary(newdata_reg)
    ROI_test         ROI_pred          std_err        
 Min.   :-23.65   Min.   :-25.054   Min.   :-20.3669  
 1st Qu.:  0.62   1st Qu.:  1.409   1st Qu.: -1.3373  
 Median :  5.20   Median :  6.104   Median : -0.1243  
 Mean   : 10.09   Mean   : 10.541   Mean   : -0.4468  
 3rd Qu.: 18.43   3rd Qu.: 16.951   3rd Qu.:  0.8666  
 Max.   : 65.15   Max.   : 60.476   Max.   : 26.3112  
mean((newdata_reg$std_err)^2)
[1] 21.76269
plot_ly(data = newdata_reg, y=newdata_reg$std_err, x=seq(1,169), color=newdata_reg$std_err)
No trace type specified:
  Based on info supplied, a 'scatter' trace seems appropriate.
  Read more about this trace type -> https://plotly.com/r/reference/#scatter
No scatter mode specifed:
  Setting the mode to markers
  Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
No trace type specified:
  Based on info supplied, a 'scatter' trace seems appropriate.
  Read more about this trace type -> https://plotly.com/r/reference/#scatter
No scatter mode specifed:
  Setting the mode to markers
  Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKYGBge3J9Cmluc3RhbGwucGFja2FnZXMoInJlYWR4bCIpCmxpYnJhcnkocmVhZHhsKQoKZGYgPSByZWFkX2V4Y2VsKCJpdGFseV9iYWxhbmNlcy54bHN4IikKbmFtZXMoZGYpCmRmID0gZGZbLWMoMSwyLDMsNCldCgpgYGAKClVuYSBwcmltYSBwb3NzaWJpbGl0w6Agw6ggY29uY2V0cmFyY2kgc29sbyBzdWdsaSBpbmRpY2kgZGkgYmlsYW5jaW8KCmBgYHtyfQpkZjIgPSBkZltjKCJSLk8uRS4gKFJldHVybiBvbiBFcXVpdHkpIiwiUi5PLkkuIENlcnZlZCAoUmV0dXJuIG9uIEludmVzdG1lbnQpIiwiUi5PLlMuIChSZXR1cm4gb24gU2FsZXMpIiwiUk9UIChSZXR1cm4gT24gVHVybm92ZXIpIEZBVFRVUkFUTyAvIEFUVElWTyBTUCIsIlIuTy5BLiBDZXJ2ZWQgKFJldHVybiBvbiBBc3NldHMpIFVUSUxFIE5FVFRPIC8gQVRUSVZPIFNQIildIAoKc3VtbWFyeShkZjIpCmBgYAoKCmBgYHtyfQppbnN0YWxsLnBhY2thZ2VzKCJwbG90bHkiKQpsaWJyYXJ5KHBsb3RseSkKCnBsb3QoZGYyKQoKI0FuYWxpenphcmUgdW4gcG8gaWwgZGF0YXNldApwbG90X2x5KGRhdGEgPSBkZjIsIHg9ZGYyJGBSLk8uSS4gQ2VydmVkIChSZXR1cm4gb24gSW52ZXN0bWVudClgLCB5PWRmMiRgUi5PLkEuIENlcnZlZCAoUmV0dXJuIG9uIEFzc2V0cykgVVRJTEUgTkVUVE8gLyBBVFRJVk8gU1BgLCAgdHlwZSA9ICJzY2F0dGVyIikKYGBgCgpTcGxpdHRpbmcKCmBgYHtyfQpzZXQuc2VlZCgwKQpzcGxpdHRpbmcgPSBzYW1wbGUoMTpucm93KGRmMiksIDAuOCpucm93KGRmMikpCnRyYWluX2RhdGEgPSBkZjJbc3BsaXR0aW5nLCBdCnRlc3RfZGF0YSA9ICBkZjJbLSBzcGxpdHRpbmcsIF0KYGBgCgoKUHJpbWEgcmVncmVzc2lvbmUgCmBgYHtyfQptb2RlbCA9IGxtKCB0cmFpbl9kYXRhJGBSLk8uSS4gQ2VydmVkIChSZXR1cm4gb24gSW52ZXN0bWVudClgIH4gdHJhaW5fZGF0YSRgUi5PLkEuIENlcnZlZCAoUmV0dXJuIG9uIEFzc2V0cykgVVRJTEUgTkVUVE8gLyBBVFRJVk8gU1BgLCBkYXRhID0gdHJhaW5fZGF0YSkgCnN1bW1hcnkobW9kZWwpCmBgYAoKQW5hbGl6emlhbW8gbGEgcmVncmVzc2lvbmUgCgpgYGB7cn0KaW5zdGFsbC5wYWNrYWdlcygibG10ZXN0IikKbGlicmFyeShsbXRlc3QpCgppbnN0YWxsLnBhY2thZ2VzKCJtdmluZmx1ZW5jZSIpCmxpYnJhcnkobXZpbmZsdWVuY2UpCgpsaWJyYXJ5KGNhcikKYGBgCgoKYGBge3J9CiMxIExJTkVBUklUQScgREVJIERBVEkgKFJlc2lkdWFscyB2cyBGaXR0ZWQgdmFsdWVzKQpwbG90KG1vZGVsKQoKCiMyIElORElQRU5ERU5aQSBERUkgUkVTSURVSQojRHVicmluLVdhdHNvbiB0ZXN0IHBlciB2ZWRlcmUgc2UgZ2xpIGVycm9yaSBzb25vIGNvcnJlbGF0aQpkd3Rlc3QoZm9ybXVsYSA9IG1vZGVsLCAgYWx0ZXJuYXRpdmUgPSAidHdvLnNpZGVkIikKCiMzIERJU1RSSUJVWklPTkUgTk9STUFMRSBERUkgUkVTSURJVQojTG8gc2hhcGlybyBzbyBmYSBzdWkgcmVzaWR1aSBwZXIgdmVkZXJlIGxhIGRpc3RyaWJ1emlvbmUgZGVsZ2kgZXJyb3JpCiNEYSB2ZWRlcmUgaW5zaWVtZSBhbCBub3JtYWwgUS1RIHBsb3QKc2hhcGlyby50ZXN0KG1vZGVsJHJlc2lkdWFscykKCiNDb21lIHZhbHV0YXJlIEhPTU8gbyBFVEVSTyBTQ0hFREFTVElDSVRBJyBCcmV1c2NoLVBhZ2FuIFRlc3QgKHZhcmlhbnphIGVycm9yaSBub24gY29zdGFudGUgSDEpCiNJbnNpZW1lIGFsbG8gc2NhbGUtbG9jYXRpb24gcGxvdAojSW5zaWVtZSBhbCByZXNpZHVhbCB2cyBmaXR0ZWQgcGxvdApicHRlc3QobW9kZWwpICMgcC12YWx1ZSBiYXNzaXNpbW8sIGRvYmJpYW1vIHJpZml1dGFyZSBIMCwgcXVpbmlkaSB2YXJpYW56YSBub24gY29zdGFudGUKCgojNCBBTkFMSVNJIERFSSBWQUxPUkkgRVNUUkVNSSBPVVRMSUVSUyAoWSkgRUQgT1NTRVJWQVpJT05JIEFEIEFMVE8gTEVWRVJBR0UoWCkKaW5mbHVlbmNlUGxvdChtb2RlbCkKaW5mbHVlbmNlSW5kZXhQbG90KG1vZGVsKQppbmZJbmRleFBsb3QobW9kZWwpCmJhcnBsb3QoY29va3MuZGlzdGFuY2UobW9kZWwpKQoKIzUgTVVMVElDT0xJTkVBUklUQScgKG5vbiB2YSBiZW5lIHF1YW5kbyByaXRvcm5hIHZhbG9yaSBzdXBlcmlvcmkgYSAxMCBkZWxsYSB2YXJpYWJpbGUpCiNWYXJpYW5jZSBJbmZsYXRpb24gRmFjdG9yCiN2aWYobW9kZWwpCgpgYGAKCk9wdGVyZWkgcGVyIGVsaW1pbmFybGkgcGVyY2jDqCBkYW5ubyBwcm9ibGVtaSBhbmNoZSBzdWwgUSZRIGUgU2hhcGlybwpgYGB7cn0KZGYyW2MoMjg0LDQ2OCw0NzgsNjE2KSxdCmBgYAoKCgoKCkNvbiB0dXR0aSBpIHByZWRpdHRvcmkgcGVyIGNvbmZyb250byBhbmNoZSBjb24gTk4KCmBgYHtyfQptb2RlbDIgPSBsbSggdHJhaW5fZGF0YSRgUi5PLkkuIENlcnZlZCAoUmV0dXJuIG9uIEludmVzdG1lbnQpYCB+IC4sIGRhdGEgPSB0cmFpbl9kYXRhKSAKc3VtbWFyeShtb2RlbDIpCnN1bW1hcnkobW9kZWwyKSRzaWdtYV4yICNNU0UKcGxvdChtb2RlbDIpCmluZmx1ZW5jZVBsb3QobW9kZWwpCgpgYGAKCnByb3ZpYW1vIGxlIHByZWRpemlvbmkgY29uIGlsIHRlc3RfZGF0YQoKCmBgYHtyfQp0ZXN0ID0gZGF0YS5mcmFtZSgKICBhY3R1YWwgPSB0ZXN0X2RhdGEkYFIuTy5JLiBDZXJ2ZWQgKFJldHVybiBvbiBJbnZlc3RtZW50KWAsCiAgcHJlZHMxID0gcHJlZGljdChtb2RlbCwgdGVzdF9kYXRhKSwKICBwcmVkczIgPSBwcmVkaWN0KG1vZGVsMiwgdGVzdF9kYXRhKQopCgp0ZXN0CmBgYAoKCklsIHNlY29uZG8gbW9kZWxsbyDDqCBkZWNpc2FtZW50ZSBtaWdsaW9yZSBhIGZhcmUgcHJlZGl6aW9uaSByaXNwZXR0byBhbCBwcmltbywgbWEgc3Blc3NvIHNpIHNjb3N0YSBwYXJlY2NoaW8KCgojIE1vZGVsbG8gRGVjaXNpb24gVHJlZQoKYGBge3J9CmxpYnJhcnkoZ2dwbG90MikKbGlicmFyeShsYXR0aWNlKQpsaWJyYXJ5KGNhcmV0KQpsaWJyYXJ5KHJwYXJ0KQpsaWJyYXJ5KHJwYXJ0LnBsb3QpCgptb2RlbF9kdCA9IHJwYXJ0KHRyYWluX2RhdGEkYFIuTy5JLiBDZXJ2ZWQgKFJldHVybiBvbiBJbnZlc3RtZW50KWAgfiAuLCBkYXRhPXRyYWluX2RhdGEsIGNwPTAuMDEwMDAwMDApCgpwcmludChtb2RlbF9kdCkKc3VtbWFyeShtb2RlbF9kdCkKcHJpbnQobG9nKG1vZGVsX2R0JHZhcmlhYmxlLmltcG9ydGFuY2UpKQpycGFydC5wbG90KG1vZGVsX2R0KQpgYGAKCmBgYHtyfQptb2RlbF9kdCRjcHRhYmxlCmBgYAoKCgoKYGBge3J9Cmluc3RhbGwucGFja2FnZXMoInBhcnR5a2l0IikKbGlicmFyeShncmlkKQpsaWJyYXJ5KGxpYmNvaW4pCmxpYnJhcnkobXZ0bm9ybSkKbGlicmFyeShwYXJ0eWtpdCkKbW9kZWxfZHQgPSBhcy5wYXJ0eShtb2RlbF9kdCkKbW9kZWxfZHQKCnBsb3QobW9kZWxfZHQsIGlubmVyX3BhbmVsID0gbm9kZV9pbm5lcihtb2RlbF9kdCwgcHZhbCA9IEZBTFNFLCBpZCA9IEZBTFNFKSwKICB0ZXJtaW5hbF9wYW5lbCA9IG5vZGVfYm94cGxvdChtb2RlbF9kdCwgaWQgPSBUUlVFKSkKYGBgCgpgYGB7cn0KbmV3ZGF0YV9kdD0gZGF0YS5mcmFtZShST0lfdGVzdCA9IHRlc3RfZGF0YSRgUi5PLkkuIENlcnZlZCAoUmV0dXJuIG9uIEludmVzdG1lbnQpYCkKbmV3ZGF0YV9kdCRST0lwcmVkID0gIHByZWRpY3QobW9kZWxfZHQsIHRlc3RfZGF0YSwgdHlwZT0icmVzcG9uc2UiKSAKYGBgCgojIEFuYWxpc2kgcHJlZGl6aW9uaSBjb24gRFQKYGBge3J9Cm5ld2RhdGFfZHQkc3RkX2VyciA9IG5ld2RhdGFfZHQkUk9JX3Rlc3QgLSBuZXdkYXRhX2R0JFJPSXByZWQKbmV3ZGF0YV9kdApzdW1tYXJ5KG5ld2RhdGFfZHQpCm1lYW4oKG5ld2RhdGFfZHQkc3RkX2VycileMikKcGxvdF9seShkYXRhID0gbmV3ZGF0YV9kdCwgeT1uZXdkYXRhX2R0JHN0ZF9lcnIsIHg9c2VxKDEsMTY5KSwgY29sb3I9bmV3ZGF0YV9kdCRzdGRfZXJyKQpgYGAKCiNBbmFsaXNpIHByZWRpemlvbmkgY29uIFJlZ3Jlc3Npb25lCgpgYGB7cn0KbmV3ZGF0YV9yZWcgPSBkYXRhLmZyYW1lKFJPSV90ZXN0ID0gdGVzdF9kYXRhJGBSLk8uSS4gQ2VydmVkIChSZXR1cm4gb24gSW52ZXN0bWVudClgKQpuZXdkYXRhX3JlZyRST0lfcHJlZCA9IHByZWRpY3QobW9kZWwyLCB0ZXN0X2RhdGEpCgoKbmV3ZGF0YV9yZWckc3RkX2VyciA9IG5ld2RhdGFfcmVnJFJPSV90ZXN0IC0gbmV3ZGF0YV9yZWckUk9JX3ByZWQKbmV3ZGF0YV9yZWcKc3VtbWFyeShuZXdkYXRhX3JlZykKbWVhbigobmV3ZGF0YV9yZWckc3RkX2VycileMikKcGxvdF9seShkYXRhID0gbmV3ZGF0YV9yZWcsIHk9bmV3ZGF0YV9yZWckc3RkX2VyciwgeD1zZXEoMSwxNjkpLCBjb2xvcj1uZXdkYXRhX3JlZyRzdGRfZXJyKQpgYGAKCgoK